/*
 * LZMA2InputStream
 *
 * Authors: Lasse Collin <lasse.collin@tukaani.org>
 *          Igor Pavlov <http://7-zip.org/>
 *
 * This file has been put into the public domain.
 * You can do whatever you want with this file.
 */

package org.tukaani.xz;

import java.io.DataInputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;

import org.tukaani.xz.lz.LZDecoder;
import org.tukaani.xz.lzma.LZMADecoder;
import org.tukaani.xz.rangecoder.RangeDecoder;

/**
 * Decompresses a raw LZMA2 stream (no XZ headers).
 */
public class LZMA2InputStream extends InputStream
{
  /**
   * Smallest valid LZMA2 dictionary size.
   * <p>
   * Very tiny dictionaries would be a performance problem, so the minimum is 4
   * KiB.
   */
  public static final int DICT_SIZE_MIN = 4096;

  /**
   * Largest dictionary size supported by this implementation.
   * <p>
   * The LZMA2 algorithm allows dictionaries up to one byte less than 4 GiB.
   * This implementation supports only 16 bytes less than 2 GiB for raw LZMA2
   * streams, and for .xz files the maximum is 1.5 GiB. This limitation is due
   * to Java using signed 32-bit integers for array indexing. The limitation
   * shouldn't matter much in practice since so huge dictionaries are not
   * normally used.
   */
  public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15;

  private static final int COMPRESSED_SIZE_MAX = 1 << 16;

  private DataInputStream in;

  private final LZDecoder lz;

  private final RangeDecoder rc = new RangeDecoder(COMPRESSED_SIZE_MAX);

  private LZMADecoder lzma;

  private int uncompressedSize = 0;

  private boolean isLZMAChunk;

  private boolean needDictReset = true;

  private boolean needProps = true;

  private boolean endReached = false;

  private IOException exception = null;


  /**
   * Gets approximate decompressor memory requirements as kibibytes for the
   * given dictionary size.
   * 
   * @param dictSize LZMA2 dictionary size as bytes, must be in the range [
   *          <code>DICT_SIZE_MIN</code>, <code>DICT_SIZE_MAX</code>]
   * 
   * @return approximate memory requirements as kibibytes (KiB)
   */
  public static int getMemoryUsage(int dictSize)
  {
    // The base state is aroudn 30-40 KiB (probabilities etc.),
    // range decoder needs COMPRESSED_SIZE_MAX bytes for buffering,
    // and LZ decoder needs a dictionary buffer.
    return 40 + COMPRESSED_SIZE_MAX / 1024 + getDictSize(dictSize) / 1024;
  }


  private static int getDictSize(int dictSize)
  {
    if (dictSize < DICT_SIZE_MIN || dictSize > DICT_SIZE_MAX)
      throw new IllegalArgumentException("Unsupported dictionary size " + dictSize);

    // Round dictionary size upward to a multiple of 16. This way LZMA
    // can use LZDecoder.getPos() for calculating LZMA's posMask.
    // Note that this check is needed only for raw LZMA2 streams; it is
    // redundant with .xz.
    return (dictSize + 15) & ~15;
  }


  /**
   * Creates a new input stream that decompresses raw LZMA2 data from
   * <code>in</code>.
   * <p>
   * The caller needs to know the dictionary size used when compressing; the
   * dictionary size isn't stored as part of a raw LZMA2 stream.
   * <p>
   * Specifying a too small dictionary size will prevent decompressing the
   * stream. Specifying a too big dictionary is waste of memory but
   * decompression will work.
   * <p>
   * There is no need to specify a dictionary bigger than the uncompressed size
   * of the data even if a bigger dictionary was used when compressing. If you
   * know the uncompressed size of the data, this might allow saving some
   * memory.
   * 
   * @param in input stream from which LZMA2-compressed data is read
   * 
   * @param dictSize LZMA2 dictionary size as bytes, must be in the range [
   *          <code>DICT_SIZE_MIN</code>, <code>DICT_SIZE_MAX</code>]
   */
  public LZMA2InputStream(InputStream in, int dictSize)
  {
    this(in, dictSize, null);
  }


  /**
   * Creates a new LZMA2 decompressor using a preset dictionary.
   * <p>
   * This is like <code>LZMAInputStream(InputStream, int)</code> except that the
   * dictionary may be initialized using a preset dictionary. If a preset
   * dictionary was used when compressing the data, the same preset dictionary
   * must be provided when decompressing.
   * 
   * @param in input stream from which LZMA2-compressed data is read
   * 
   * @param dictSize LZMA2 dictionary size as bytes, must be in the range [
   *          <code>DICT_SIZE_MIN</code>, <code>DICT_SIZE_MAX</code>]
   * 
   * @param presetDict preset dictionary or <code>null</code> to use no preset
   *          dictionary
   */
  public LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict)
  {
    // Check for null because otherwise null isn't detect
    // in this constructor.
    if (in == null)
      throw new NullPointerException();

    this.in = new DataInputStream(in);
    this.lz = new LZDecoder(getDictSize(dictSize), presetDict);

    if (presetDict != null && presetDict.length > 0)
      needDictReset = false;
  }

  private static final byte[] BUFFER_READ = new byte[1];


  /**
   * Decompresses the next byte from this input stream.
   * <p>
   * Reading lots of data with <code>read()</code> from this input stream may be
   * inefficient. Wrap it in <code>java.io.BufferedInputStream</code> if you
   * need to read lots of data one byte at a time.
   * 
   * @return the next decompressed byte, or <code>-1</code> to indicate the end
   *         of the compressed stream
   * 
   * @throws CorruptedInputException
   * 
   * @throws XZIOException if the stream has been closed
   * 
   * @throws EOFException compressed input is truncated or corrupt
   * 
   * @throws IOException may be thrown by <code>in</code>
   */
  public int read() throws IOException
  {
    return read(BUFFER_READ, 0, 1) == -1 ? -1 : (BUFFER_READ[0] & 0xFF);
  }


  /**
   * Decompresses into an array of bytes.
   * <p>
   * If <code>len</code> is zero, no bytes are read and <code>0</code> is
   * returned. Otherwise this will block until <code>len</code> bytes have been
   * decompressed, the end of LZMA2 stream is reached, or an exception is
   * thrown.
   * 
   * @param buf target buffer for uncompressed data
   * @param off start offset in <code>buf</code>
   * @param len maximum number of uncompressed bytes to read
   * 
   * @return number of bytes read, or <code>-1</code> to indicate the end of the
   *         compressed stream
   * 
   * @throws CorruptedInputException
   * 
   * @throws XZIOException if the stream has been closed
   * 
   * @throws EOFException compressed input is truncated or corrupt
   * 
   * @throws IOException may be thrown by <code>in</code>
   */
  public int read(byte[] buf, int off, int len) throws IOException
  {
    if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
      throw new IndexOutOfBoundsException();

    if (len == 0)
      return 0;

    if (in == null)
      throw new XZIOException("Stream closed");

    if (exception != null)
      throw exception;

    if (endReached)
      return -1;

    try
    {
      int size = 0;

      while (len > 0)
      {
        if (uncompressedSize == 0)
        {
          decodeChunkHeader();
          if (endReached)
            return size == 0 ? -1 : size;
        }

        int copySizeMax = Math.min(uncompressedSize, len);

        if (!isLZMAChunk)
        {
          lz.copyUncompressed(in, copySizeMax);
        } else
        {
          lz.setLimit(copySizeMax);
          lzma.decode();
        }

        int copiedSize = lz.flush(buf, off);
        off += copiedSize;
        len -= copiedSize;
        size += copiedSize;
        uncompressedSize -= copiedSize;

        if (uncompressedSize == 0)
          if (!rc.isFinished() || lz.hasPending())
            throw new CorruptedInputException();
      }

      return size;

    } catch (IOException e)
    {
      exception = e;
      throw e;
    }
  }


  private void decodeChunkHeader() throws IOException
  {
    int control = in.readUnsignedByte();

    if (control == 0x00)
    {
      endReached = true;
      return;
    }

    if (control >= 0xE0 || control == 0x01)
    {
      needProps = true;
      needDictReset = false;
      lz.reset();
    } else if (needDictReset)
    {
      throw new CorruptedInputException();
    }

    if (control >= 0x80)
    {
      isLZMAChunk = true;

      uncompressedSize = (control & 0x1F) << 16;
      uncompressedSize += in.readUnsignedShort() + 1;

      int compressedSize = in.readUnsignedShort() + 1;

      if (control >= 0xC0)
      {
        needProps = false;
        decodeProps();

      } else if (needProps)
      {
        throw new CorruptedInputException();

      } else if (control >= 0xA0)
      {
        lzma.reset();
      }

      rc.prepareInputBuffer(in, compressedSize);

    } else if (control > 0x02)
    {
      throw new CorruptedInputException();

    } else
    {
      isLZMAChunk = false;
      uncompressedSize = in.readUnsignedShort() + 1;
    }
  }


  private void decodeProps() throws IOException
  {
    int props = in.readUnsignedByte();

    if (props > (4 * 5 + 4) * 9 + 8)
      throw new CorruptedInputException();

    int pb = props / (9 * 5);
    props -= pb * 9 * 5;
    int lp = props / 9;
    int lc = props - lp * 9;

    if (lc + lp > 4)
      throw new CorruptedInputException();

    lzma = new LZMADecoder(lz, rc, lc, lp, pb);
  }


  /**
   * Returns the number of uncompressed bytes that can be read without blocking.
   * The value is returned with an assumption that the compressed input data
   * will be valid. If the compressed data is corrupt,
   * <code>CorruptedInputException</code> may get thrown before the number of
   * bytes claimed to be available have been read from this input stream.
   * <p>
   * In LZMAInputStream, the return value will be non-zero when the decompressor
   * is in the middle of an LZMA2 chunk. The return value will then be the
   * number of uncompressed bytes remaining from that chunk.
   * 
   * @return the number of uncompressed bytes that can be read without blocking
   */
  public int available() throws IOException
  {
    if (in == null)
      throw new XZIOException("Stream closed");

    if (exception != null)
      throw exception;

    return uncompressedSize;
  }


  /**
   * Closes the stream and calls <code>in.close()</code>. If the stream was
   * already closed, this does nothing.
   * 
   * @throws IOException if thrown by <code>in.close()</code>
   */
  public void close() throws IOException
  {
    if (in != null)
    {
      try
      {
        in.close();
      } finally
      {
        in = null;
      }
    }
  }
}
